*02_clean_closure_data.do

capture log close
clear
set linesize 255

global root    "/disk/bulkw/nencka/schooling_pandemic/2021_10_18_final/"
global input   "$root/Input"
global scripts "$root/Scripts"
global output  "$root/Output"
global temp    "$root/Temp"
global output  "$root/Output/predictors"
global log     "$root/Log"


log using "$log/02_clean_closure_data.log", replace text



*****************************************************************************
*****************************************************************************
*****************************************************************************


*Load state school closure orders

import delimited using "$input/state_school_closures_data.csv", clear

drop if _n == 1
rename (v1 v2) (state type)

replace state = "District of Columbia" if state == "Washington, D.C."
statastates, name(state)

assert _merge==3

rename state_fips statefip
rename state state_name

keep statefip type state_name
sort statefip
gen order_or_rec = 1 if type != "no_order" & type != "none"
replace order_or_rec = 0 if mi(order_or_rec)
tab type order_or_rec 
save "$temp/state_closures.dta", replace


*****************************************************************************
*****************************************************************************
*****************************************************************************


*Load town school closure data

import excel using "$input/Spanish Flu (City School Closures) feb_2021.xlsx", sheet("Sheet1") clear first
rename *, lower

desc, fullnames
di _N

keep if !missing(cities)

list state cities numdaysclosed
destring numdaysclosed, gen(days_closed) force
sum days_closed, d
tab days_closed, m

gen bdays_closed = .
replace bdays_closed = 1 if inrange(days_closed,0,21)
replace bdays_closed = 2 if inrange(days_closed,22,35)
replace bdays_closed = 3 if inrange(days_closed,36,150)
replace bdays_closed = 4 if numdaysclosed == "closed"
tab bdays_closed, m



list state cities if numdaysclosed == "closed"
tab days_closed if numdaysclosed == "closed", m
replace days_closed = . if numdaysclosed == "closed"
di _N

rename cities mcd

replace mcd= strlower(mcd)

*Drop New Bedford, Sacramento, Bangor, and Atlanta.
*These four cities have errors in the 1940 complete count decennial census data from IPUMS

list mcd state days_closed if inlist(mcd,"new bedford","sacramento","bangor","atlanta")
drop if inlist(mcd,"new bedford","sacramento","bangor","atlanta")


replace state = strproper(state)

statastates, name(state)
drop if _merge == 2
drop _merge 

rename state_fips statefip
sort statefip mcd

gen days_npi_net_schoolclosures = total_npi_days-days_closed

*Create flag for places where we could find days closed info
gen in_daysclosed_sample = 1 if ~mi(days_closed)
replace in_daysclosed_sample = 0 if mi(days_closed)
tab in_daysclosed_sample, mi 


keep statefip mcd days_closed bdays_closed total_npi_days days_npi_net_schoolclosures in_daysclosed_sample
list
desc, fullnames


save "$temp/school_closures_towns_1920.dta", replace

log close

